/* ========================================================================= */
/*              Copyright (C) 2000 Texas Instruments Incorporated.           */
/*                              All Rights Reserved                          */
/* ========================================================================= */
/* The code scales 3 planes of y, Cr, Cb data in varying combinations        */
/* where Y denotes luma, Cr is red chrominance and Cb is blue chrominance    */
/* NOTES                                                                     */
/* When the phrase "pixels" is used this measn a color pixel, this has       */
/* a color combination of  4:2:2, 4:4:4 or 4:2:0                             */
/* for 4:4:4 there are 3 identical coloe spaces all the same size            */
/* for 4:2:2 there planes are the same height but only half as wide          */
/* for 4:2:0 the planes are 1/2 the width and 1/2 the height                 */
/*                                                                           */
/* The scaler will automatically filter output data only within the display  */
/* if it is larger than the display it will write to the boundary, if smaller*/
/* it will centre the image in the middle.                                   */
/*                                                                           */
/* This function can be applied to R, G, B data also R->Y, G->Cr, B->Cb      */
/* ========================================================================= */
/*              Copyright (C) 2000 Texas Instruments Incorporated.           */
/*                              All Rights Reserved                          */
/* ========================================================================= */

#include <csl.h>    
#include <csl_dat.h>

#include "ycbcr422pl16_to_rgb565_h.h"  
#include "scale_horz_h.h"
#include "scale_vert_h.h"
#include "pix_expand_h.h"

#define  f4_4_4 2
#define  f4_2_2 1
#define  f4_2_0 0 
#pragma  CODE_SECTION(scale_2d_ycrcb,".opt_gp");
/* ========================================================================= */
void scale_2d_ycrcb (
  int     format,           /* 4:4:4 == 2, 4:2:2 == 1, 4:2:0 == 0            */

  int     p_h,              /* numerator for horizontal scale factor p/q     */
  int     q_h,              /* denominator for horizontal scale factor p/q   */

  short * hpatch,           /* stride table for cr/b horizontal scaling jumps*/
  short * hh,               /* pntr to horz filter bank                      */
  int     l_hh,             /* number of taps in each horz filter            */
  int     n_hh,             /* number of filters in horz filter bank         */

  int     p_v,              /* numerator for vertical scale factor p/q       */
  int     q_v,              /* denominator for vertical scale factor p/q     */

  short * vpatch,           /* stride table for vertical scaling jumps       */
  short * hv,               /* pntr to vert filter bank                      */
  int     l_hv,             /* number of taps in each vert filter            */
  int     n_hv,             /* number of filters in vert filter bank         */

  int     in_pic_height,    /* height of picture in Pixels multiple of 2     */
  int     in_pic_width,     /* width  of picture in Pixels multiple of 8     */

  int     display_height,   /* height of picture in Pixels multiple of 2     */
  int     display_width,    /* width  of picture in Pixels multiple of 8     */

  unsigned char * image_y,  /* pntr to input image planar y values           */
  unsigned char * image_cr, /* pntr to input image planar cr values          */
  unsigned char * image_cb, /* pntr to input image planar cb values          */
  void  * L2scratch_pad,    /* temp space for internal buffers and stores    */
                            /* aligned to a double boundary                  */
  unsigned char * image_rgb)/* pntr to output image, scaled in color         */
/* ========================================================================= */
{
  short * vpatchy;     /* copy of patch table for y scaling */
  short * vpatchc;     /* copy of patch table for cr and cb scaling */
  short * t_filt;      /* pntr to temporary buffer length l_hh */

  int y_width;         /* width of raw y plane image */
  int y_height;        /* height of raw y plane image */
  int in_y_width;      /* width of data to be scaled, sub section of y buffer */
  int in_y_offset;     /* offset from left edge of raw Y plane buffer */
  int in_y_offset_1;   /* offset from left edge of raw Y plane buffer */
  int y_rd_index;      /* index into the raw Y buffer */
  int y_filter_no;     /* the current vertical filter being used for the 
                          vertical filtering of Y */

  int out_y_width;     /* width of data scaled Y buffer */
  int main_out_y_width; /* width of scaled chroma image */
  int total_out_y_width;/* width of scaled chroma image */
  int hylines;         /* number of horizontally scaled y lines 
                          put in y r(otating)b(uffer) */
  int y_rb_write_line; /* index into y r(otating)b(uffer) */
  int y_rb_read_line;  /* index into y r(otating)b(uffer) for removal */

  int out_color_offset;/* offset into the display buffer depends on display 
                          and scaled picture size */

  int in_vert_offset;  /* generic intermediate offset to top of scaled image */

  int in_c_offset;     /* offset to (cropped) corner of raw chroma images */
  int in_c_offset_1;   /* offset to (cropped) corner of raw chroma images */
  int c_width;         /* width of chroma raw image */
  int in_c_width;      /* width of cropped raw chroma image */
  int out_c_width;     /* width of scaled chroma image */
  int c_rd_index;      /* index into the raw C buffers */
  int hclines;         /* number of horizontally scaled c lines 
                          put in c r(otating)b(uffer) */
  int c_rb_write_line; /* index into c r(otating)b(uffer) for addition*/

  int c_rb_read_line;  /* index into c r(otating)b(uffer) for removal */
  int c_filter_no;     /* the current vertical filter being used for the 
                          vertical filtering of C */

  int max_clines;      /* number of color lines needed by colr format
                          e.g. 444 needs 2 lines, 420 needs only 1 */

  int display_lines;   /* number of color lines written to display so far */
  int display_pixels;  /* number of color pixels written to display so far */
  int window_height;   /* number of lines to display */

  int color;           /* number of color elements/pixel rgb=3, 4:2:2 = 1.5 */
  int color_shift;     /* divider for whether output is 422,420 or 444 */

  unsigned char *buf_y0;/*pointer to 1st pingpong buffer for y raw image data */
  int    in_y_buf_size;/* size of the input ping or pong buffer */
  short  *buf_y1;      /* pointer to 2nd buffer containing expanded y pixels  */
  short  *rot_buf_y2;  /* pntr to circular line buffer containing l_hv lines of 
                          horizontally scaled raw y data */
  short  *buf_y3;      /* pntr to output buffer 2 lines of vertically & 
                          horizontally scaled y data ready for combining 
                          into color pixels */
  short *y_filt_state; /* packed last filter number and input index */

  unsigned char *buf_cr0;/* pointer to 1st ping pong buffer for c raw image data */
  int    in_c_buf_size;/* size of the input ping or pong buffer */
  short  *buf_cr1;     /* pointer to 2nd buffer containing expanded c pixels  */
  short  *rot_buf_cr2; /* pntr to circular line buffer containing l_hv lines of
                          horizontally scaled raw c data */
  short  *buf_cr3;     /* pntr to buffer of 2 lines of vertically and horizontally
                          scaled c data ready for combining into color pixels */

  unsigned char *buf_cb0;/* pointer to 1st ping pong buffer for c raw image data */
  short  *buf_cb1;     /* pntr to 2nd buffer containing expanded c pixels */
  short  *rot_buf_cb2; /* pntr to circular line buffer containing l_hv lines of
                          horizontally scaled raw c data */
  short  *buf_cb3;     /* pntr to buffer of 2 lines of vertically & horizontally
                          scaled c data ready for combining into color pixels */
  short  *c_filt_state;/* packed last filter number and input index */

  int    y_scaled;     /*num. of finished y lines in output buffer pre color*/
  int    c_scaled;     /*num. of finished c clines in output buffer pre color*/
  unsigned char *buf_rgb; /* pntr to ping pong buffer containing 2 lines of 
                             color pixels formed from the y,cr,cb data */

  short cmatrix[5] = {0x2543, 0x3313, -0x0C8A, -0x1A04, 0x408D};                             
  unsigned char *gp_store; /* pointer to the scratchpad space */

  int    store_index;      /* index into main scratch pad for buffers */
  int    divider;          /* number of strips needed for cache allocation */
  int    i;                /* current strip being worked on */

  int ping_in_y, pong_in_y;/* tags for buffers, ping or pong */
  int ping_in_c, pong_in_c;/* tags for buffers, ping or pong */
  unsigned int  in_id_y ;  /* input EDMA xfer id             */
  unsigned int  in_id_cr;  /* input EDMA xfer id             */
  unsigned int  in_id_cb;  /* input EDMA xfer id             */
  int           first;     /* tag for out buffer             */
  unsigned int  out_id0;   /* output EDMA xfer id            */
  int ping_out, pong_out;  /* tags for buffers, ping or pong */
  int write_buf;

  
/* ========================= set up dimensions ============================== */
/* and align all offsets to 8 element boundaries and counts to mutliples  */
/* of 8 elements */

  color = 2;               /* each color is now 2 bytes, 565 */  

  y_width = in_pic_width;
  y_height = in_pic_height;
  
  /* the scaled output width is bigger than the display ? */
  if (p_h * in_pic_width >=  q_h * display_width)
  {
     in_y_offset = (y_width * p_h  - q_h * display_width)/(2*p_h);
     in_y_width  = (q_h * display_width)/p_h;
     out_y_width = display_width;
     out_color_offset =  0;
  /* the scaled image width is smaller than the display ? */
  } else
  {
     in_y_offset = 0;
     in_y_width  = y_width;
     out_y_width = (y_width * p_h) / q_h;
     out_color_offset = color*((display_width * q_h - p_h * y_width)/(q_h*2));
  }

  /* the scaled output is bigger than the display ? */
  if (p_v * in_pic_height >= q_v * display_height)
  {
     in_vert_offset= y_width*((y_height * p_v  - q_v * display_height)/(2*p_v));
     window_height = display_height;
  /* the scaled image is smaller than the display ? */
  } else
  {
     in_vert_offset = 0;
     window_height = (in_pic_height * p_v)/q_v;
     window_height = (window_height + 2) & ~ 1;
     out_color_offset += color * display_width *
                         ((display_height * q_v - p_v * y_height)/(2*q_v));
  }                  
  out_color_offset &= ~3; 

  if (format == f4_4_4)
  {
     in_c_offset = in_y_offset + in_vert_offset;
     c_width     = y_width;
     in_c_width  = in_y_width;
     out_c_width = out_y_width;
     max_clines = 2;
     color_shift = 0;
  } else if (format == f4_2_2)
  {
     c_width = y_width >> 1;
     in_c_offset = in_y_offset ? (c_width*p_h - q_h*(display_width>>1))/(2*p_h):0;
     in_c_offset += (in_vert_offset >> 1);
     in_c_width = in_y_width >> 1;
     out_c_width = out_y_width >> 1;
     max_clines = 2;
     color_shift = 1;
  } else /* format == f4_2_0 */
  {
     c_width = y_width >> 1;
     in_c_offset = in_y_offset ? (c_width*p_h - q_h*(display_width>>1))/(4*p_h) : 0;
     in_c_offset += (in_vert_offset >> 2);
     in_c_width = in_y_width >> 1;
     out_c_width = out_y_width >> 1;
     max_clines = 1;
     color_shift = 1;
  }
  in_y_offset += in_vert_offset;
  in_y_offset_1 = in_y_offset + (hpatch[1] >> 1);
  in_c_offset_1 = in_c_offset + (hpatch[1] >> 1);

/* ========================= set up buffers ============================= */
  gp_store = (unsigned char *) L2scratch_pad;

  /* scratch pad is already set up to be aligned on a double boundary */
  /* Y buffers */
  store_index = 16;
  store_index += 2*l_hh + 16 + 32;
  store_index += 2*((in_y_width & ~7) + 16);
  store_index += sizeof(short)*((in_y_width & ~7) + 32); /* */
  store_index += sizeof(short)*((out_y_width & ~7) + 16)*l_hv;
  store_index += 2*sizeof(short)*((out_y_width & ~7) + 16);
  store_index += 2*((in_c_width & ~7) + 16);
  store_index += sizeof(short)*((in_c_width & ~7) + 32); /* */
  store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv;
  store_index += 2*sizeof(short)*((out_c_width & ~7) + 16);
  /* Cb buffers */
  store_index += 2*((in_c_width & ~7) + 16);
  store_index += sizeof(short)*((in_c_width & ~7) + 32); /* */
  store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv;
  store_index += 2*sizeof(short)*((out_c_width & ~7) + 16);

  /* total data required is now known */
  divider = (store_index + 24*1024)/(16*1024);

  main_out_y_width = out_y_width ; 
  in_y_width /= divider;
  out_y_width /= divider;
  in_c_width /= divider;

  /* round up to nearest end */
  total_out_y_width = 0;
  out_y_width += 8;  
  out_y_width &= ~7;
  out_c_width = out_y_width >> color_shift;

  /* Y buffers */
  store_index = 0;
  t_filt = (short *) &gp_store[0];
  store_index += 2*l_hh + 16;
  y_filt_state = (short *) &gp_store[store_index];
  store_index += 16;
  c_filt_state = (short *) &gp_store[store_index];
  store_index += 16;

  buf_y0 = &gp_store[store_index]; 
  store_index += 2*((in_y_width & ~7) + 32); /* 16 */
  store_index += 64;

  buf_y1 = (short *) &gp_store[store_index];
  store_index += sizeof(short)*((in_y_width & ~7) + 16);
  store_index += 64;

  rot_buf_y2 = (short *) &gp_store[store_index];
  store_index += sizeof(short)*((out_y_width & ~7) + 16)*l_hv;

  buf_y3 = (short *) &gp_store[store_index];
  store_index += 2*sizeof(short)*((out_y_width & ~7) + 16);
 
  /* Cr buffers */
  buf_cr0 = &gp_store[store_index]; 
  store_index += 2*((in_c_width & ~7) + 32); /* 16 */
  store_index += 64;

  buf_cr1 = (short *) &gp_store[store_index];
  store_index += sizeof(short)*((in_c_width & ~7) + 16);
  store_index += 64;

  rot_buf_cr2 = (short *) &gp_store[store_index];
  store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv;

  buf_cr3 = (short *) &gp_store[store_index];
  store_index += 2*sizeof(short)*((out_c_width & ~7) + 16);

  /* Cb buffers */
  buf_cb0 = &gp_store[store_index]; 
  store_index += 2*((in_c_width & ~7) + 32); /* 16 */
  store_index += 64;

  buf_cb1 = (short *) &gp_store[store_index];
  store_index += sizeof(short)*((in_c_width & ~7) + 16);
  store_index += 64;

  rot_buf_cb2 = (short *) &gp_store[store_index];
  store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv;

  buf_cb3 = (short *) &gp_store[store_index];
  store_index += 2*sizeof(short)*((out_c_width & ~7) + 16);
        
  /* final color buffer */
  /* this is a double buffer holding a total of 4 lines of color data */
  buf_rgb = &gp_store[store_index];


/* ========================= main scaling loop =========================== */
  /* initialise initial filter count scaled by filt length */

  y_filt_state[0] = c_filt_state[0] = l_hh*n_hh;
  y_filt_state[1] = c_filt_state[1] = 0;
  y_filt_state[2] = c_filt_state[2] = 0;
  y_filt_state[3] = c_filt_state[3] = 0;
  y_filt_state[4] = c_filt_state[4] = 0;
  y_filt_state[5] = c_filt_state[5] = 0;

  for (i=0; i < divider; i++)
  {
   total_out_y_width += out_y_width;
   if (i == divider - 1)
   {
     out_y_width += main_out_y_width - total_out_y_width;
     out_y_width &= ~3;
     out_c_width = out_y_width >> color_shift;
   }
   vpatchy = vpatch;
   vpatchc = vpatch;
   y_rd_index = c_rd_index = 0;
   y_rb_read_line = c_rb_read_line = 0;
   display_lines = 0;
   display_pixels = 0;
   hylines = hclines = 0;
   y_rb_write_line = c_rb_write_line = 0;
   first = 1;

   y_filter_no = c_filter_no = 0;
   y_scaled = c_scaled = 0;

   in_y_offset += y_filt_state[2];
   in_y_offset_1 += y_filt_state[3];

   in_c_offset += c_filt_state[2];
   in_c_offset_1 += c_filt_state[3];

   in_y_buf_size = ((in_y_width & ~15) + 48);  /* 7, 32 */
   in_c_buf_size = ((in_c_width & ~15) + 48);  /* 7, 32 */
 //  in_y_buf_size = (in_y_width+16) & ~15;  /* 7, 32 */
 //  in_c_buf_size = (in_c_width+16) & ~15;  /* 7, 32 */
 
  
  /* 1st dma transfer to get the ball rolling */
   ping_in_y = 1; pong_in_y = 0;
   ping_in_c = 1; pong_in_c = 0;

   in_id_y = DAT_copy(&image_y[(in_y_offset & ~7) + (y_rd_index & ~7)],
                 &buf_y0[pong_in_y * in_y_buf_size],
                 (unsigned short) in_y_buf_size);   

   in_id_cr = DAT_copy(&image_cr[(in_c_offset & ~7) + (c_rd_index & ~7)],
                 &buf_cr0[pong_in_c * in_c_buf_size],
                 (unsigned short) in_c_buf_size);   

   in_id_cb = DAT_copy(&image_cb[(in_c_offset & ~7) + (c_rd_index & ~7)],
                 &buf_cb0[pong_in_c * in_c_buf_size],
                 (unsigned short) in_c_buf_size);   

   ping_out = 0;
   pong_out = 0;
   write_buf = 0;
   

   while(display_lines < window_height)
   {
     if (display_lines >= (window_height - 6)) break; //JS
     
     if (hylines < l_hv)
     {  
        /* wait on previous transfer */
        DAT_wait(in_id_y);
        /* transfer data for next line */
        in_id_y = DAT_copy(&image_y[(in_y_offset & ~7) + 
                                    ((y_rd_index + y_width) & ~7)],
                           &buf_y0[ping_in_y * in_y_buf_size],
                           (unsigned short) in_y_buf_size);   

        pix_expand_asm(in_y_buf_size,
                      &buf_y0[pong_in_y * in_y_buf_size],
                      buf_y1);

        hpatch[0] = 2*(in_y_offset & 7) + 2*(y_rd_index & 7);
        hpatch[1] = 2*(in_y_offset_1 & 7) + 2*(y_rd_index & 7);
               
        scale_horz_asm(buf_y1,
                      in_y_width, 
                      &rot_buf_y2[((out_y_width & ~7)+8)*y_rb_write_line],
                      out_y_width,
                      hh,
                      l_hh,
                      n_hh,
                      hpatch, y_filt_state);
        
        pong_in_y ^= 1; ping_in_y ^= 1;
        y_rd_index += y_width;
        hylines += 1;
        y_rb_write_line += 1;
        if (y_rb_write_line == l_hv) y_rb_write_line = 0;
     }  
     
     if (hclines < l_hv)
     {
        DAT_wait(in_id_cr);
        in_id_cr = DAT_copy(&image_cr[(in_c_offset & ~7) +
                                      ((c_rd_index + c_width) & ~7)],
                            &buf_cr0[ping_in_c*in_c_buf_size],
                            (unsigned short) in_c_buf_size);   
        
        DAT_wait(in_id_cb);
        in_id_cb = DAT_copy(&image_cb[(in_c_offset & ~7) +
                                      ((c_rd_index + c_width) & ~7)],
                            &buf_cb0[ping_in_c*in_c_buf_size],
                            (unsigned short) in_c_buf_size);
        
        pix_expand_asm(in_c_buf_size,
                      &buf_cr0[pong_in_c * in_c_buf_size],
                      buf_cr1);
        pix_expand_asm(in_c_buf_size,
                      &buf_cb0[pong_in_c * in_c_buf_size],
                      buf_cb1);
        
        hpatch[0] = 2*(c_rd_index & 7) + 2*(in_c_offset & 7);      
        hpatch[1] = 2*(c_rd_index & 7) + 2*(in_c_offset_1 & 7);      
        
        scale_horz_asm(buf_cr1,
                       in_c_width, 
                       &rot_buf_cr2[((out_c_width & ~7)+8)*c_rb_write_line],
                       out_c_width,
                       hh,
                       l_hh,
                       n_hh,
                       hpatch, c_filt_state);
                        
        scale_horz_asm(buf_cb1,
                       in_c_width, 
                       &rot_buf_cb2[((out_c_width & ~7)+8)*c_rb_write_line],
                       out_c_width,
                       hh,
                       l_hh,
                       n_hh,
                       hpatch, c_filt_state);
                          
        pong_in_c ^= 1; ping_in_c ^= 1;
        hclines += 1;
        c_rd_index += c_width;

        c_rb_write_line += 1;
        if (c_rb_write_line == l_hv) c_rb_write_line = 0;
     }
     

     if (hylines >= l_hv) /* enough lines for vertical y filtering? */
     {
    	scale_vert_asm(rot_buf_y2,
                      &buf_y3[y_scaled*((out_y_width & ~7)+8)],
                      (out_y_width & ~7)+8,
                      &hv[y_filter_no*l_hv],
                      t_filt,
                      l_hv,
                      y_rb_read_line);

        y_scaled += 1;
        y_rb_read_line += ((int) (vpatchy[0]));
        y_rb_read_line = y_rb_read_line % l_hv;
        hylines -= ((int) (vpatchy[0]));

        vpatchy++;
        y_filter_no++;
        if (y_filter_no == n_hv) vpatchy = vpatch;
        if (y_filter_no == n_hv) y_filter_no = 0;
     }

     /* enough lines for vertical chroma filtering? */
     if (hclines >= l_hv && c_scaled < max_clines)  
     {
    	scale_vert_asm(rot_buf_cb2,
                      &buf_cb3[c_scaled*((out_c_width & ~7)+8)],
                      (out_c_width & ~7) + 8,
                      &hv[c_filter_no*l_hv],
                      t_filt,
                      l_hv,
                      c_rb_read_line);

    	scale_vert_asm(rot_buf_cr2,
                      &buf_cr3[c_scaled*((out_c_width & ~7)+8)],
                      (out_c_width & ~7) + 8,
                      &hv[c_filter_no*l_hv],
                      t_filt,
                      l_hv,
                      c_rb_read_line);
        c_scaled += 1;

        c_rb_read_line += ((int) (vpatchc[0]));
        c_rb_read_line = c_rb_read_line % l_hv;
        hclines -= ((int) (vpatchc[0]));

	    vpatchc++;
        c_filter_no++;
        if (c_filter_no >= n_hv) vpatchc = vpatch;
        if (c_filter_no >= n_hv) c_filter_no = 0;
     }

   /* color conversion can only occur when hylines >= l_hv and
      hcrlines >= l_hv but depending on the format c*lines should be reused */

     if (format != f4_2_0 && y_scaled == 2 && c_scaled == 2)
     {  
        ycbcr422pl16_to_rgb565_asm(cmatrix,
                &buf_y3[0],
                &buf_cr3[0],
                &buf_cb3[0],
                (unsigned short *) &buf_rgb[2*ping_out*color*out_y_width],
                out_y_width); /* in L2 */
        ycbcr422pl16_to_rgb565_asm(cmatrix,
                &buf_y3 [(out_y_width & ~7) + 8],
                &buf_cr3[(out_c_width & ~7) + 8],
                &buf_cb3[(out_c_width & ~7) + 8],
                (unsigned short *) &buf_rgb[(1+2*ping_out)*color*out_y_width],
                out_y_width); /*<-in L2 */
         ping_out ^= 1;
     }
     /* this is done to fix the anomally of red being 
        swapped for blue chrom - the api say ycbcr, I say it
        is ycrcb as is the convention */
        
     if (format == f4_2_0 && y_scaled == 2 && c_scaled == 1)
     { 
        ycbcr422pl16_to_rgb565_asm(cmatrix,
                &buf_y3[0],
                &buf_cr3[0], &buf_cb3[0],
                (unsigned short *) &buf_rgb[2*ping_out*color*out_y_width],
                out_y_width); /* in L2 */
        ycbcr422pl16_to_rgb565_asm(cmatrix,
                &buf_y3 [(out_y_width & ~7) + 8],
                &buf_cr3[0], &buf_cb3[0],
                (unsigned short *) &buf_rgb[(1 + 2*ping_out)*color*out_y_width],
                out_y_width); /*<-in L2 */

        ping_out ^= 1;
     }

     /* this doesn't need to be double buffered as it writes out */
     /* so it has no dependency as in the input buffer case */
     /* while only a 1/2 fram is being computed at once the
        output lines are wrtten into every other line so
        it increments 2lines for each 1 */

     /* first time around we do not want this to occur but everything else
        is ok to keep updateing, only the DAT_copy needs delaying */

     if (write_buf)
     {
        if (!first) DAT_wait(out_id0);   
        out_id0 = DAT_copy(&buf_rgb[2*pong_out*color*out_y_width],
                           &image_rgb[out_color_offset+display_pixels*color],
                           color*out_y_width);
        display_pixels += display_width;

        out_id0 = DAT_copy(&buf_rgb[(1+2*pong_out)*color*out_y_width], 
                           &image_rgb[out_color_offset+display_pixels*color],
                           color*out_y_width);
        display_pixels += display_width;
        first = 0;
        pong_out ^= 1;
     }

     if (y_scaled == 2 && c_scaled > 0)
     {        
        /* this becomes 2 display_pixels */
        display_lines += 2;
        y_scaled = 0; c_scaled = 0;
        write_buf = 1;
     } else {
        write_buf = 0;
     }

    
  }  /* end while */
  if (write_buf)
  {
    DAT_wait(out_id0);   
    out_id0 = DAT_copy(&buf_rgb[2*pong_out*color*out_y_width],
                       &image_rgb[out_color_offset+display_pixels*color],
                       color*out_y_width);
    display_pixels += display_width;

    out_id0 = DAT_copy(&buf_rgb[(1+2*pong_out)*color*out_y_width], 
                       &image_rgb[out_color_offset+display_pixels*color],
                       color*out_y_width);
    display_pixels += display_width;
  }

  /* clean up at end of for loop for each strip */
  DAT_wait(in_id_cr);
  DAT_wait(in_id_cb);
  DAT_wait(in_id_y);
  DAT_wait(out_id0);
  out_color_offset += color*out_y_width;

  y_filt_state[0] = y_filt_state[4];   /* y filter count */
  c_filt_state[0] = c_filt_state[4];   /* c filter count */ 

 }/* end for */                                          
} 


/* ========================================================================= */
/*              Copyright (C) 2000 Texas Instruments Incorporated.           */
/*                              All Rights Reserved                          */
/* ========================================================================= */
                                                                   
